
library(tidyverse)

library(parallel)

# Functions

matchingFun <- function(this_time) {
  
  require(dplyr)
  require(reshape2)
  require(MatchIt)
  require(lmtest)
  require(sandwich)
  require(data.table)
  
  this_time <- 
    as.POSIXct(this_time)
  
  this_reach.dt <- 
    rtqtrp_network.el[created_at >= as.POSIXct(this_time) - 3600 * 2 &
                        created_at < as.POSIXct(this_time) + 3600 * 2 ]
  
  this_reach_past.dt <- 
    rtqtrp_network.el[created_at >= as.POSIXct(this_time) - 3600 * 24 * 7 &
                        created_at <= as.POSIXct(this_time) - 3600 * 2]
  
  
  this_existing_past_reciprocity <-
    this_reach_past.dt %>%
    inner_join(this_reach.dt %>%
                 dplyr::select(from_user, to_user), 
               by = c('from_user'='to_user',
                      'to_user'='from_user'))
  
  this_reach_stats.df <- 
    this_existing_past_reciprocity %>%
    dplyr::ungroup() %>%
    dplyr::group_by(to_user, to_tweet, what) %>%
    dplyr::count()
  
  this_reach_stats.df <- 
    this_reach_stats.df %>%
    tidyr::pivot_wider(id_cols = c(to_user, to_tweet),
                       names_from = what,
                       values_from = n,
                       values_fill = 0)
  
  if (!exists('this_reach_stats.df')) {
    
    this_reach_stats.df <-
      data.frame()
  }
  
  tweets_engagement.dt$treat <- 
    treat
  
  this_engagement <- 
    tweets_engagement.dt[created_at >= as.POSIXct(this_time) - 3600 * 2 &
                           created_at < as.POSIXct(this_time) + 3600 * 2 ]
  
  this_engagement <-
    this_engagement %>%
    left_join(this_reach_stats.df, by = c("user_id" = "to_user",
                                          "tweet_id" = "to_tweet"))
  
  if (is.null(this_engagement$retweet)) {
    this_engagement$retweet <- 0
  }
  
  this_engagement$retweet[is.na(this_engagement$retweet)] <- 0
  
  this_engagement$rt_n_unreciprocated <-
    this_engagement$rt_n - this_engagement$retweet
  
  if(any(this_engagement$rt_n_unreciprocated<0)) {stop("rt_n_unreciprocated < 0")}
  
  m.out <-
    try({
      matchit(treat ~
                user_favourites_count + user_followers_count + user_friends_count +
                user_listed_count + user_statuses_count + user_verified + url + media,
              data = this_engagement,
              method = "nearest", distance = "glm")
    }, silent = T)
  
  if (class(m.out) == "try-error") return()
  
  m.subclass <- 
    data.frame(time = this_time,
               tweet_id = this_engagement$tweet_id,
               rt_n = this_engagement$rt_n,
               treat = m.out$treat,
               subclass = as.character(m.out$subclass))
  
  m.data <- 
    match.data(m.out)
  
  fit <- 
    try({glm(rt_n_unreciprocated ~ treat +
               user_favourites_count + user_followers_count + user_friends_count +
               user_listed_count + user_statuses_count + user_verified + url + media, 
             data = m.data, weights = weights, family = 'poisson')})
  
  if(class(fit) != "try-error") {
    
    coeftest_res <- 
      try({coeftest(fit, vcov. = vcovCL, cluster = ~subclass)})
    
    if (class(coeftest_res) != "try-error") {
      
      est <- 
        coeftest_res[,1]
      
      sterr <- 
        coeftest_res[,2]
      
      pv <- 
        coeftest_res[,4]
      
      this_res <- 
        data.frame(time = this_time,
                   est, 
                   sterr,
                   pv,
                   what = 'rt_n_unreciprocated',
                   stringsAsFactors = F)
      
    }
    
  }
  
  
  fit <- 
    try({
      glm(rt_n ~ treat +
            user_favourites_count + user_followers_count + user_friends_count +
            user_listed_count + user_statuses_count + user_verified + url + media, 
          data = m.data, weights = weights, family = 'poisson')
    })
  
  if(class(fit) != "try-error") {
    
    coeftest_res <- 
      try({coeftest(fit, vcov. = vcovCL, cluster = ~subclass)})
    
    if(class(coeftest_res) != 'try-error') {
      est <- 
        coeftest_res[,1]
      
      sterr <- 
        coeftest_res[,2]
      
      pv <- 
        coeftest_res[,4]
      
      if(!exists("this_res")) {
        this_res <- 
          data.frame()
      }
      
      this_res <- 
        rbind(this_res,
              data.frame(time = this_time,
                         est, 
                         sterr,
                         pv,
                         what = 'rt_n',
                         stringsAsFactors = F))
    }
    
  }
  
  if(!exists("this_res")) {
    
    this_res <- 
      NULL
    
  } else {
    
    this_res$var <- rownames(this_res)
    rownames(this_res) <- NULL
    
  }
  
  return(list(subclass = m.subclass, fit = this_res))
  
}

matchingFun2 <- function(treat) {
  
  require(dplyr)
  require(reshape2)
  require(MatchIt)
  require(lmtest)
  require(sandwich)
  require(data.table)
  
  
  tweets_engagement.dt$treat <- 
    treat
  
  these_24_hours <-
    tweets_engagement.dt$cut_24_hours[tweets_engagement.dt$treat]
  
  this_engagement <- 
    tweets_engagement.dt[tweets_engagement.dt$cut_24_hours %in% these_24_hours,]
  
  m.out <-
    try({
      matchit(treat ~
                user_favourites_count + user_followers_count + user_friends_count +
                user_listed_count + user_statuses_count + user_verified + url + media
              + cut_24_hours,
              data = this_engagement,
              method = "nearest", distance = "glm")
    }, silent = T)
  
  if (class(m.out) == "try-error") return()
  
  m.data <- 
    match.data(m.out)
  
  fit <- 
    try({
      glm(rt_n ~ treat +
            user_favourites_count + user_followers_count + user_friends_count +
            user_listed_count + user_statuses_count + user_verified + url + media +
            cut_24_hours, 
          data = m.data, weights = weights, family = 'poisson')
    })
  
  
  return(list('m.out' = m.out, 'fit' = fit))
  
}


# Load global data

load("/pvol/rstudio/socialsense/R/analysis/coded_user_ids_descriptions.RData")

load("/pvol/rstudio/socialsense/R/analysis/australia_news_sources.RData")

load("/pvol/rstudio/socialsense/R/analysis/urls_mediabias.dt.RData")

load("/pvol/rstudio/socialsense/R/analysis/coded_opinions_twt_users.RData")

source("/pvol/rstudio/socialsense/R/analysis/scripts/load-prepare.R")

# Bushfires

# bushfire_tweets_engagement.dt <-
#   bushfire_tweets.dt %>%
#   dplyr::filter(is.na(retweeted_status_id)) %>%
#   dplyr::select(tweet_id, user_id, created_at) %>%
#   left_join(bushfire_tweets.dt %>%
#               dplyr::group_by(tweet_id = quoted_status_id) %>%
#               dplyr::summarize(qt_n = n()),
#             by = c('tweet_id')) %>%
#   left_join(bushfire_tweets.dt %>%
#               dplyr::group_by(tweet_id = retweeted_status_id) %>%
#               dplyr::summarize(rt_n = n()),
#             by = c('tweet_id')) %>%
#   left_join(bushfire_tweets.dt %>%
#               dplyr::group_by(tweet_id = in_reply_to_status_id) %>%
#               dplyr::summarize(rp_n = n()),
#             by = c('tweet_id')) %>%
#   left_join(bushfire_users.dt %>%
#               dplyr::select(tweet_id,
#                             user_id,
#                             user_favourites_count,
#                             user_followers_count,
#                             user_friends_count,
#                             user_listed_count,
#                             user_statuses_count,
#                             user_verified) %>%
#               dplyr::distinct(tweet_id, user_id, .keep_all = T),
#             by = c("tweet_id", "user_id"))
# 
# bushfire_tweets_engagement.dt$qt_n[
#   is.na(bushfire_tweets_engagement.dt$qt_n)] <- 0
# 
# bushfire_tweets_engagement.dt$rt_n[
#   is.na(bushfire_tweets_engagement.dt$rt_n)] <- 0
# 
# bushfire_tweets_engagement.dt$rp_n[
#   is.na(bushfire_tweets_engagement.dt$rp_n)] <- 0
# 
# bushfire_tweets_engagement.dt$far_right <-
#   bushfire_tweets_engagement.dt$user_id %in% farright_user_ids
# 
# bushfire_tweets_engagement.dt$climateaction <-
#   bushfire_tweets_engagement.dt$user_id %in% climateaction_user_ids
# 
# bushfire_tweets_engagement.dt$journalist <-
#   bushfire_tweets_engagement.dt$user_id %in% journalist_user_ids
# 
# bushfire_tweets_engagement.dt$politician <-
#   bushfire_tweets_engagement.dt$user_id %in% politician_user_ids
# 
# bushfire_tweets_engagement.dt$government <-
#   bushfire_tweets_engagement.dt$user_id %in% government_user_ids
# 
# bushfire_tweets_engagement.dt$url <-
#   bushfire_tweets_engagement.dt$tweet_id %in% bushfire_urls.dt$tweet_id
# 
# bushfire_tweets_engagement.dt$url_youtube <-
#   bushfire_tweets_engagement.dt$tweet_id %in%
#   bushfire_urls.dt$tweet_id[bushfire_urls.dt$domain == 'youtu.be']
# 
# bushfire_tweets_engagement.dt$url_facebook <-
#   bushfire_tweets_engagement.dt$tweet_id %in%
#   bushfire_urls.dt$tweet_id[bushfire_urls.dt$domain == 'www.facebook.com']
# 
# bushfire_tweets_engagement.dt$url_mainstream <-
#   bushfire_tweets_engagement.dt$tweet_id %in%
#   bushfire_urls.dt$tweet_id[gsub("http(s)?://|www\\.|/", "", bushfire_urls.dt$domain) %in%
#                               australia_news_sources$domain]
# 
# bushfire_tweets_engagement.dt$url_twitter <-
#   bushfire_tweets_engagement.dt$tweet_id %in%
#   bushfire_urls.dt$tweet_id[bushfire_urls.dt$domain == 'twitter.com']
# 
# bushfire_tweets_engagement.dt$url_credibility <-
#   bushfire_urls_mediabias.dt$Credibility[match(bushfire_tweets_engagement.dt$tweet_id,
#                                                bushfire_urls_mediabias.dt$tweet_id)]
# 
# bushfire_tweets_engagement.dt$url_bias <-
#   bushfire_urls_mediabias.dt$Bias[match(bushfire_tweets_engagement.dt$tweet_id,
#                                         bushfire_urls_mediabias.dt$tweet_id)]
# 
# bushfire_tweets_engagement.dt$media <-
#   bushfire_tweets_engagement.dt$tweet_id %in% bushfire_media.dt$tweet_id
# 
# bushfire_tweets_engagement.dt$conspiracy_theory <-
#   bushfire_tweets_engagement.dt$tweet_id %in% conspiracy_theory_twt
# 
# bushfire_tweets_engagement.dt$misinformation <-
#   bushfire_tweets_engagement.dt$tweet_id %in% misinformation_twt
# 
# bushfire_tweets_engagement.dt$problematic_speech <-
#   bushfire_tweets_engagement.dt$tweet_id %in% problematic_speech_twt
# 
# bushfire_tweets_engagement.dt$date <-
#   as.Date(bushfire_tweets_engagement.dt$created_at)
# 
# bushfire_rtqtrp_network.el <-
#   bushfire_tweets.dt %>%
#   dplyr::filter(!is.na(in_reply_to_user_id)) %>%
#   dplyr::select(from_user = user_id,
#                 to_user = in_reply_to_user_id,
#                 to_tweet = in_reply_to_status_id,
#                 created_at,
#                 tweet_id) %>%
#   dplyr::mutate(what = 'reply') %>%
#   dplyr::bind_rows(bushfire_tweets.dt %>%
#                      dplyr::filter(!is.na(quoted_status_user_id)) %>%
#                      dplyr::select(from_user = user_id,
#                                    to_user = quoted_status_user_id,
#                                    to_tweet = quoted_status_id,
#                                    created_at,
#                                    tweet_id) %>%
#                      dplyr::mutate(what = 'quote')) %>%
#   dplyr::bind_rows(bushfire_tweets.dt %>%
#                      dplyr::filter(!is.na(retweeted_status_user_id)) %>%
#                      dplyr::select(from_user = user_id,
#                                    to_user = retweeted_status_user_id,
#                                    to_tweet = retweeted_status_id,
#                                    created_at,
#                                    tweet_id) %>%
#                      dplyr::mutate(what = 'retweet'))
# 
# roll_seq <-
#   levels(cut(bushfire_tweets_engagement.dt$created_at, "2 hours"))
# 
# bushfire_tweets_engagement.dt$cut_2_hours <-
#   cut(bushfire_tweets_engagement.dt$created_at, "2 hours")
# 
# bushfire_tweets_engagement.dt$cut_24_hours <-
#   cut(bushfire_tweets_engagement.dt$created_at, "24 hours")
# 
# bushfire_matching.df <-
#   data.frame()
# 
# rtqtrp_network.el <-
#   bushfire_rtqtrp_network.el
# 
# tweets_engagement.dt <-
#   bushfire_tweets_engagement.dt
# 
# print("Parallel on bushfires...")
# 
# cl <- makeCluster(14)
# 
# bushfire_par_res <- list()
# 
# clusterExport(cl, c("rtqtrp_network.el", "tweets_engagement.dt"))
# 
# ## 0: Random
# 
# random_user_ids <-
#   sample(unique(bushfire_users.dt$user_id), 1000)
# 
# treat <-
#   tweets_engagement.dt$user_id %in% random_user_ids
# 
# clusterExport(cl, 'treat')
# 
# bushfire_par_res[['random_1']] <-
#   parLapply(cl, roll_seq, matchingFun)
# 
# random_user_ids <-
#   sample(unique(bushfire_users.dt$user_id), 1000)
# 
# treat <-
#   tweets_engagement.dt$user_id %in% random_user_ids
# 
# clusterExport(cl, 'treat')
# 
# bushfire_par_res[['random_2']] <-
#   parLapply(cl, roll_seq, matchingFun)
# 
# random_user_ids <-
#   sample(unique(bushfire_users.dt$user_id), 1000)
# 
# treat <-
#   tweets_engagement.dt$user_id %in% random_user_ids
# 
# clusterExport(cl, 'treat')
# 
# bushfire_par_res[['random_3']] <-
#   parLapply(cl, roll_seq, matchingFun)
# 
# ## 1: Far-right
# 
# treat <-
#   tweets_engagement.dt$far_right
# 
# clusterExport(cl, 'treat')
# 
# bushfire_par_res[['far_right']] <-
#   parLapply(cl, roll_seq, matchingFun)
# 
# ## 1a: ClimateAction
# 
# treat <-
#   tweets_engagement.dt$climateaction
# 
# clusterExport(cl, 'treat')
# 
# bushfire_par_res[['climateaction']] <-
#   parLapply(cl, roll_seq, matchingFun)
# 
# ## 2: Journalists
# 
# treat <-
#   tweets_engagement.dt$journalist
# 
# clusterExport(cl, 'treat')
# 
# bushfire_par_res[['journalist']] <-
#   parLapply(cl, roll_seq, matchingFun)
# 
# ## 3: Politicians
# 
# treat <-
#   tweets_engagement.dt$politician
# 
# clusterExport(cl, 'treat')
# 
# bushfire_par_res[['politician']] <-
#   parLapply(cl, roll_seq, matchingFun)
# 
# ## 4: Government
# 
# treat <-
#   tweets_engagement.dt$government
# 
# clusterExport(cl, 'treat')
# 
# bushfire_par_res[['government']] <-
#   parLapply(cl, roll_seq, matchingFun)
# 
# # Opinions
# 
# # 1: Problematic speech + Far-right
# 
# treat <-
#   tweets_engagement.dt$far_right &
#   (tweets_engagement.dt$conspiracy_theory |
#      tweets_engagement.dt$misinformation |
#      tweets_engagement.dt$problematic_speech)
# 
# bushfire_par_res[['problematic_speech_broad_farright']] <-
#   matchingFun2(treat)
# 
# # 2: Problematic speech
# 
# treat <-
#   tweets_engagement.dt$problematic_speech
# 
# clusterExport(cl, 'treat')
# 
# bushfire_par_res[['problematic_speech']] <-
#   parLapply(cl, roll_seq, matchingFun)
# 
# # 3: Misinformation
# 
# treat <-
#   tweets_engagement.dt$misinformation
# 
# clusterExport(cl, 'treat')
# 
# bushfire_par_res[['misinformation']] <-
#   parLapply(cl, roll_seq, matchingFun)
# 
# # 4: Conspiracy theory
# 
# treat <-
#   tweets_engagement.dt$conspiracy_theory
# 
# clusterExport(cl, 'treat')
# 
# bushfire_par_res[['conspiracy_theory']] <-
#   parLapply(cl, roll_seq, matchingFun)
# 
# ## Links
# 
# tweets_engagement.dt <-
#   tweets_engagement.dt[tweets_engagement.dt$url == TRUE,]
# 
# ## 0: Random
# 
# random_user_ids <-
#   sample(unique(tweets_engagement.dt$user_id), 1000)
# 
# treat <-
#   tweets_engagement.dt$user_id %in% random_user_ids
# 
# clusterExport(cl, c("tweets_engagement.dt", "treat"))
# 
# bushfire_par_res[['random_link_1']] <-
#   parLapply(cl, roll_seq, matchingFun)
# 
# random_user_ids <-
#   sample(unique(bushfire_users.dt$user_id), 1000)
# 
# treat <-
#   tweets_engagement.dt$user_id %in% random_user_ids
# 
# clusterExport(cl, 'treat')
# 
# bushfire_par_res[['random_link_2']] <-
#   parLapply(cl, roll_seq, matchingFun)
# 
# random_user_ids <-
#   sample(unique(bushfire_users.dt$user_id), 1000)
# 
# treat <-
#   tweets_engagement.dt$user_id %in% random_user_ids
# 
# clusterExport(cl, 'treat')
# 
# bushfire_par_res[['random_link_3']] <-
#   parLapply(cl, roll_seq, matchingFun)
# 
# 
# ## 1: Low credibility links
# 
# 
# treat <-
#   tweets_engagement.dt$url_credibility %in% "Low Credibility"
# 
# clusterExport(cl, c("treat"))
# 
# bushfire_par_res[['low_credibility']] <-
#   parLapply(cl, roll_seq, matchingFun)
# 
# ## 2: High credibility links
# 
# treat <-
#   tweets_engagement.dt$url_credibility %in% "High Credibility"
# 
# clusterExport(cl, 'treat')
# 
# bushfire_par_res[['high_credibility']] <-
#   parLapply(cl, roll_seq, matchingFun)
# 
# # 3: Pseudoscience-Conspiracy
# 
# treat <-
#   tweets_engagement.dt$url_bias %in% "Conspiracy-Pseudoscience"
# 
# clusterExport(cl, 'treat')
# 
# bushfire_par_res[['conspiracy_link']] <-
#   parLapply(cl, roll_seq, matchingFun)
# 
# # 4: Facebook
# 
# treat <-
#   tweets_engagement.dt$url_facebook
# 
# clusterExport(cl, 'treat')
# 
# bushfire_par_res[['facebook_link']] <-
#   parLapply(cl, roll_seq, matchingFun)
# 
# # 5: Youtube
# 
# treat <-
#   tweets_engagement.dt$url_youtube
# 
# clusterExport(cl, 'treat')
# 
# bushfire_par_res[['youtube_link']] <-
#   parLapply(cl, roll_seq, matchingFun)
# 
# # 6: Mainstream media
# 
# treat <-
#   tweets_engagement.dt$url_mainstream
# 
# clusterExport(cl, 'treat')
# 
# bushfire_par_res[['mainstream_link']] <-
#   parLapply(cl, roll_seq, matchingFun)
# 
# 
# # 7: Twitter
# 
# treat <-
#   tweets_engagement.dt$url_twitter
# 
# clusterExport(cl, 'treat')
# 
# bushfire_par_res[['twitter_link']] <-
#   parLapply(cl, roll_seq, matchingFun)
# 
# save(bushfire_par_res, file = "/pvol/rstudio/socialsense/R/analysis/bushfire_par_res.RData")
# 
# stopCluster(cl)
# 
# rm(bushfire_par_res)

# Covid

require(parallel)

covid_tweets_engagement.dt <- 
  covid_tweets.dt %>%
  dplyr::filter(is.na(retweeted_status_id)) %>%
  dplyr::select(tweet_id, user_id, created_at) %>%
  left_join(covid_tweets.dt %>%
              dplyr::group_by(tweet_id = quoted_status_id) %>%
              dplyr::summarize(qt_n = n()),
            by = c('tweet_id')) %>%
  left_join(covid_tweets.dt %>%
              dplyr::group_by(tweet_id = retweeted_status_id) %>%
              dplyr::summarize(rt_n = n()),
            by = c('tweet_id')) %>%
  left_join(covid_tweets.dt %>%
              dplyr::group_by(tweet_id = in_reply_to_status_id) %>%
              dplyr::summarize(rp_n = n()),
            by = c('tweet_id')) %>%
  left_join(covid_users.dt %>%
              dplyr::select(tweet_id, 
                            user_id, 
                            user_favourites_count,
                            user_followers_count, 
                            user_friends_count,
                            user_listed_count,
                            user_statuses_count,
                            user_verified) %>%
              dplyr::distinct(tweet_id, user_id, .keep_all = T),
            by = c("tweet_id", "user_id"))

covid_tweets_engagement.dt$qt_n[
  is.na(covid_tweets_engagement.dt$qt_n)] <- 0

covid_tweets_engagement.dt$rt_n[
  is.na(covid_tweets_engagement.dt$rt_n)] <- 0

covid_tweets_engagement.dt$rp_n[
  is.na(covid_tweets_engagement.dt$rp_n)] <- 0

covid_tweets_engagement.dt$far_right <- 
  covid_tweets_engagement.dt$user_id %in% farright_user_ids

covid_tweets_engagement.dt$journalist <- 
  covid_tweets_engagement.dt$user_id %in% journalist_user_ids

covid_tweets_engagement.dt$politician <- 
  covid_tweets_engagement.dt$user_id %in% politician_user_ids

covid_tweets_engagement.dt$government <- 
  covid_tweets_engagement.dt$user_id %in% government_user_ids

covid_tweets_engagement.dt$url <- 
  covid_tweets_engagement.dt$tweet_id %in% covid_urls.dt$tweet_id

covid_tweets_engagement.dt$url_youtube <- 
  covid_tweets_engagement.dt$tweet_id %in% 
  covid_urls.dt$tweet_id[covid_urls.dt$domain == 'youtu.be']

covid_tweets_engagement.dt$url_facebook <- 
  covid_tweets_engagement.dt$tweet_id %in% 
  covid_urls.dt$tweet_id[covid_urls.dt$domain == 'www.facebook.com']

covid_tweets_engagement.dt$url_mainstream <- 
  covid_tweets_engagement.dt$tweet_id %in% 
  covid_urls.dt$tweet_id[gsub("http(s)?://|www\\.|/", "", covid_urls.dt$domain) %in% 
                           australia_news_sources$domain]

covid_tweets_engagement.dt$url_twitter <- 
  covid_tweets_engagement.dt$tweet_id %in% 
  covid_urls.dt$tweet_id[covid_urls.dt$domain == 'twitter.com']

covid_tweets_engagement.dt$url_credibility <-  
  covid_urls_mediabias.dt$Credibility[match(covid_tweets_engagement.dt$tweet_id,
                                            covid_urls_mediabias.dt$tweet_id)]

covid_tweets_engagement.dt$url_bias <-  
  covid_urls_mediabias.dt$Bias[match(covid_tweets_engagement.dt$tweet_id,
                                     covid_urls_mediabias.dt$tweet_id)]

covid_tweets_engagement.dt$media <- 
  covid_tweets_engagement.dt$tweet_id %in% covid_media.dt$tweet_id

covid_tweets_engagement.dt$conspiracy_theory <-
  covid_tweets_engagement.dt$tweet_id %in% conspiracy_theory_twt

covid_tweets_engagement.dt$misinformation <-
  covid_tweets_engagement.dt$tweet_id %in% misinformation_twt

covid_tweets_engagement.dt$problematic_speech <-
  covid_tweets_engagement.dt$tweet_id %in% problematic_speech_twt

covid_tweets_engagement.dt$date <- 
  as.Date(covid_tweets_engagement.dt$created_at)

covid_rtqtrp_network.el <- 
  covid_tweets.dt %>%
  dplyr::filter(!is.na(in_reply_to_user_id)) %>%
  dplyr::select(from_user = user_id,
                to_user = in_reply_to_user_id,
                to_tweet = in_reply_to_status_id,
                created_at,
                tweet_id) %>%
  dplyr::mutate(what = 'reply') %>%
  dplyr::bind_rows(covid_tweets.dt %>%
                     dplyr::filter(!is.na(quoted_status_user_id)) %>%
                     dplyr::select(from_user = user_id,
                                   to_user = quoted_status_user_id,
                                   to_tweet = quoted_status_id,
                                   created_at,
                                   tweet_id) %>%
                     dplyr::mutate(what = 'quote')) %>%
  dplyr::bind_rows(covid_tweets.dt %>%
                     dplyr::filter(!is.na(retweeted_status_user_id)) %>%
                     dplyr::select(from_user = user_id,
                                   to_user = retweeted_status_user_id,
                                   to_tweet = retweeted_status_id,
                                   created_at,
                                   tweet_id) %>%
                     dplyr::mutate(what = 'retweet'))

roll_seq <-
  levels(cut(covid_tweets_engagement.dt$created_at, "2 hours"))

covid_tweets_engagement.dt$cut_2_hours <-
  cut(covid_tweets_engagement.dt$created_at, "2 hours")

covid_tweets_engagement.dt$cut_24_hours <-
  cut(covid_tweets_engagement.dt$created_at, "24 hours")

covid_matching.df <- 
  data.frame()

rtqtrp_network.el <- 
  covid_rtqtrp_network.el

tweets_engagement.dt <-
  covid_tweets_engagement.dt

print("Parallel on covid")

cl <- makeCluster(14)

covid_par_res <- list()

clusterExport(cl, c("rtqtrp_network.el", "tweets_engagement.dt"))

## 0: Random

random_user_ids <- 
  sample(unique(covid_users.dt$user_id), 1000)

treat <- 
  tweets_engagement.dt$user_id %in% random_user_ids

clusterExport(cl, 'treat')

covid_par_res[['random_1']] <-
  parLapply(cl, roll_seq, matchingFun)

random_user_ids <- 
  sample(unique(covid_users.dt$user_id), 1000)

treat <- 
  tweets_engagement.dt$user_id %in% random_user_ids

clusterExport(cl, 'treat')

covid_par_res[['random_2']] <-
  parLapply(cl, roll_seq, matchingFun)

random_user_ids <- 
  sample(unique(covid_users.dt$user_id), 1000)

treat <- 
  tweets_engagement.dt$user_id %in% random_user_ids

clusterExport(cl, 'treat')

covid_par_res[['random_3']] <-
  parLapply(cl, roll_seq, matchingFun)

## 1: Far-right

treat <- 
  tweets_engagement.dt$far_right

clusterExport(cl, 'treat')

covid_par_res[['far_right']] <-
  parLapply(cl, roll_seq, matchingFun)

## 2: Journalists

treat <- 
  tweets_engagement.dt$journalist

clusterExport(cl, 'treat')

covid_par_res[['journalist']] <-
  parLapply(cl, roll_seq, matchingFun)

## 3: Politicians

treat <- 
  tweets_engagement.dt$politician

clusterExport(cl, 'treat')

covid_par_res[['politician']] <-
  parLapply(cl, roll_seq, matchingFun)

## 4: Government  

treat <- 
  tweets_engagement.dt$government

clusterExport(cl, 'treat')

covid_par_res[['government']] <-
  parLapply(cl, roll_seq, matchingFun)

# Opinions 

# 1: Problematic speech + Far-right

treat <- 
  tweets_engagement.dt$far_right & 
  (tweets_engagement.dt$conspiracy_theory |
     tweets_engagement.dt$misinformation |
     tweets_engagement.dt$problematic_speech)

covid_par_res[['problematic_speech_broad_farright']] <-
  matchingFun2(treat)

# 2: Conspiracy theory

treat <- 
  tweets_engagement.dt$problematic_speech

clusterExport(cl, 'treat')

covid_par_res[['problematic_speech']] <-
  parLapply(cl, roll_seq, matchingFun)

# 3: Misinformation

treat <- 
  tweets_engagement.dt$misinformation

clusterExport(cl, 'treat')

covid_par_res[['misinformation']] <-
  parLapply(cl, roll_seq, matchingFun)

# 4: Conspiracy theory

treat <- 
  tweets_engagement.dt$conspiracy_theory

clusterExport(cl, 'treat')

covid_par_res[['conspiracy_theory']] <-
  parLapply(cl, roll_seq, matchingFun)


## Links

tweets_engagement.dt <- 
  tweets_engagement.dt[tweets_engagement.dt$url == TRUE,]

## 0: Random

random_user_ids <- 
  sample(unique(tweets_engagement.dt$user_id), 1000)

treat <- 
  tweets_engagement.dt$user_id %in% random_user_ids

clusterExport(cl, c("tweets_engagement.dt", "treat"))

covid_par_res[['random_link_1']] <-
  parLapply(cl, roll_seq, matchingFun)

random_user_ids <- 
  sample(unique(covid_users.dt$user_id), 1000)

treat <- 
  tweets_engagement.dt$user_id %in% random_user_ids

clusterExport(cl, 'treat')

covid_par_res[['random_link_2']] <-
  parLapply(cl, roll_seq, matchingFun)

random_user_ids <- 
  sample(unique(covid_users.dt$user_id), 1000)

treat <- 
  tweets_engagement.dt$user_id %in% random_user_ids

clusterExport(cl, 'treat')

covid_par_res[['random_link_3']] <-
  parLapply(cl, roll_seq, matchingFun)


## 1: Low credibility links

treat <- 
  tweets_engagement.dt$url_credibility %in% "Low Credibility"

clusterExport(cl, c("treat"))

covid_par_res[['low_credibility']] <-
  parLapply(cl, roll_seq, matchingFun)

## 2: High credibility links

treat <- 
  tweets_engagement.dt$url_credibility %in% "High Credibility"

clusterExport(cl, 'treat')

covid_par_res[['high_credibility']] <-
  parLapply(cl, roll_seq, matchingFun)

# 3: Pseudoscience-Conspiracy

treat <- 
  tweets_engagement.dt$url_bias %in% "Conspiracy-Pseudoscience"

clusterExport(cl, 'treat')

covid_par_res[['conspiracy_link']] <-
  parLapply(cl, roll_seq, matchingFun)

# 4: Facebook

treat <- 
  tweets_engagement.dt$url_facebook

clusterExport(cl, 'treat')

covid_par_res[['facebook_link']] <-
  parLapply(cl, roll_seq, matchingFun)

# 5: Youtube

treat <- 
  tweets_engagement.dt$url_youtube

clusterExport(cl, 'treat')

covid_par_res[['youtube_link']] <-
  parLapply(cl, roll_seq, matchingFun)

# 6: Mainstream media

treat <- 
  tweets_engagement.dt$url_mainstream

clusterExport(cl, 'treat')

covid_par_res[['mainstream_link']] <-
  parLapply(cl, roll_seq, matchingFun)


# 7: Twitter

treat <- 
  tweets_engagement.dt$url_twitter

clusterExport(cl, 'treat')

covid_par_res[['twitter_link']] <-
  parLapply(cl, roll_seq, matchingFun)

save(covid_par_res, file = "/pvol/rstudio/socialsense/R/analysis/covid_par_res.RData")


stopCluster(cl)
